State Results
# clear labels for versions
versions <- tibble(
version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$\\beta$ Centered at Survey Value (Spline Smoothed)",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Survey Value",
"$Pr(S_1|untested)$ Centered at Survey Value",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
"$\\beta$ Centered at Survey Value (Spline Smoothed)",
"$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
) )
)
# read state-level results
targets_dir <- here("_targets")
state_res <- tar_read(state_v1,
store =targets_dir) %>%
bind_rows(
tar_read(state_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v4,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v5,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v6,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v7,
store =targets_dir)
)
covidestim <- tar_read(covidestim_biweekly_state,
store =targets_dir)
dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
codes <- read_csv(here('data/demographic/statecodes.csv'))
# last biweek for first model is biweek 24
last_biweek_old_model <- dates %>%
group_by(biweek) %>%
summarize(mindate=min(date),maxdate=max(date)) %>%
filter("2021-11-30" >= mindate & "2021-11-30" <= maxdate)
cat("Last two week interval with previous model was biweek", last_biweek_old_model$biweek,
"\ndates", paste0("(", last_biweek_old_model$mindate, ", ",
last_biweek_old_model$maxdate, ")"))## Last two week interval with previous model was biweek 24
## dates (2021-11-19, 2021-12-02)
end_old_model <- last_biweek_old_model$maxdate
state_res <- state_res %>%
rename(state=fips) %>%
left_join(versions) %>%
mutate(state=toupper(state)) %>%
left_join(covidestim, relationship= "many-to-many") %>%
left_join(dates, relationship = "many-to-many") %>%
rename(name=state_name) %>%
group_by(biweek) %>%
mutate(mindate=min(date),
maxdate=max(date)) %>%
ungroup()
if(filter_versions){
state_res <- state_res %>%
filter(version %in% c("v3", "v5", "v6", "v7")) %>%
mutate(vlabel=case_when(
version == "v7" ~ "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
version == "v5" ~ "$\\beta$ Centered at Survey Value",
version =="v6" ~ "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value"
))
}theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 16, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size = 12), #font size
axis.text = element_text( #axis text
size = 9),
legend.text = element_text(size = 12),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 11, face = "bold", color="white"),
strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}Summarizing Concordance with Covidestim
Below, we see that although the implementation where \(\Pr(S_1|untested)\) was at the survey as well as the implementation did not vary by state or date, the mean simulation interval width was actually lower, indicating the greater concordance was not merely a result of wider bounds.
state_res %>%
select(-c(date)) %>%
distinct() %>%
mutate(interval_length =exp_cases_ub - exp_cases_lb) %>%
group_by(vlabel) %>%
summarize(mean_interval_length = mean(interval_length)) %>%
arrange(desc(mean_interval_length))state_res_differenced <- state_res %>%
select(-date) %>% distinct() %>%
group_by(state, vlabel) %>%
mutate(exp_cases_median_differenced = exp_cases_median - lag(exp_cases_median, n =1, order_by = biweek),
infections_differenced = infections - lag(infections, n = 1, order_by=biweek)) %>%
select(state, name, vlabel, contains('differenced'), biweek) %>%
filter(!is.na(exp_cases_median_differenced)) %>%
filter(biweek <=25)
# if lag is NEGATIVE, covidestim LEADS wastewater
# if lag is POSITIVE, covidestim LAGS wastewater
# since by the documentation, 'the lag k value returned by ccf(x, y) estimates
# the correlation between x[t+k] and y[t].'
get_max_lag_by_version <- function(input_df_for_version,
varnames = c("infections_differenced",
"exp_cases_median_differenced"),
comparison = "bias corrected") {
var1 <- varnames[1]
var2 <-varnames[2]
lab <- unique(input_df_for_version$vlabel)
cross_correlations <- ccf(
pull(input_df_for_version, var1),
pull(input_df_for_version, var2),
plot = FALSE,
lag.max=3)
ccf_res <- tibble(lag = cross_correlations$lag[,,1],
correlation=cross_correlations$acf[,,1])
m <- ccf_res %>% filter(correlation == max(correlation))
tibble(vlabel = unique(input_df_for_version$vlabel),
fips = unique(input_df_for_version$state),
max_lag = m$lag,
max_corr = m$correlation,
comparison = comparison,
name = unique(input_df_for_version$name))
}
cross_corr <- state_res_differenced %>%
group_by(state, vlabel) %>%
group_split() %>%
map_df(get_max_lag_by_version)
states_with_lag <- cross_corr %>%
filter(max_lag > 0 &
vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$" &
max_corr > .55) %>%
pull(fips) %>%
unique()# corrected %>%
# mutate(in_interval = case_when(
# exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
# exp_cases_lb > infections ~ "Below Interval",
# exp_cases_ub < infections ~ "Above Interval"
# )) %>%
# filter(!is.na(in_interval)) %>%
# group_by(in_interval, state, vlabel) %>%
# summarize(n_per_county=n()) %>%
# group_by(vlabel, in_interval) %>%
# summarize(n_per_version = sum(n_per_county)) %>%
# group_by(vlabel) %>%
# mutate(total = sum(n_per_version)) %>%
# ungroup() %>%
# mutate(prop=n_per_version/total)
by_version <- state_res %>%
# filter(biweek >=6) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
)) %>%
filter(!is.na(in_interval)) %>%
group_by(in_interval, vlabel) %>%
summarize(n=n()) %>%
group_by(vlabel) %>%
mutate(total = sum(n)) %>%
mutate(prop=n/total)
labels <- by_version %>%
arrange(prop) %>%
pull(vlabel) %>%
as.character() %>%
unique()
by_version %>%
mutate(in_interval = factor(in_interval,
levels = c("Above Interval",
"In Interval",
"Below Interval"
))) %>%
ggplot(aes(x= fct_reorder(vlabel,prop,max),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2"),
breaks = c("Below Interval",
"In Interval",
"Above Interval")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion of Simulation Intervals Where Covidestim Median\nWas Within, Above, or Below the Median") +
theme_c() +
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
theme(plot.title = element_text(size = 20),
plot.subtitle = element_text(size=18,
face='italic',
margin=margin(4,0,4,0)),
axis.text.x=element_text(size=12),
axis.title = element_text(size=14)) +
scale_x_discrete(labels = (TeX(labels)))by_version %>%
group_by(vlabel) %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n) %>%
ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
geom_text(aes(label=round(prop,3), x= prop+.03),
angle=0) +
geom_bar(stat="identity", fill="#596281") +
scale_y_discrete(breaks= unique(by_version$vlabel),
labels=function(x)TeX(x)) +
scale_x_continuous(expand=c(0,0),
limits=c(0,.9), n.breaks=7) +
theme_c(axis.text.y = element_text(size = 13, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10)) +
labs(y="",
x="Proportion Containing the Covidestim Median",
title="Proportion Containing the Covidestim Median",
subtitle = "By Implementation")Simulation Intervals Over Time
all_versions <- state_res %>%
group_by(state) %>%
summarize(state_n=n_distinct(version)) %>%
filter(state_n == max(state_n)) %>%
pull(state)state_res %>%
filter(state %in% sample(unique(all_versions),5)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .8,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "By State") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Compare Versions
All Versions
subset <- state_res %>%
filter(vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")
# pal <- c("#247C90", "red")
#
# names(pal) <- c(as.character(unique(subset$vlabel)), "Covidestim")
state_res %>%
filter(state %in% sample(unique(all_versions),5)) %>%
filter(version %in% c("v2", "v5", "v6", "v7")) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=vlabel),
alpha = .8,
show.legend=FALSE #,
# fill="#596281"
) +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "By State") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))Wu et al.’s Priors versus CTIS Priors (that do not vary)
colors_labs <- state_res %>%
filter(version %in% c("v1","v7")) %>%
pull(vlabel) %>% unique()
state_res %>%
filter(version %in% c("v1","v7") & state %in% sample(unique(.$state),15)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=vlabel),
alpha = .6
) +
facet_wrap(~name,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed),
nrow=3),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
viridis::scale_fill_viridis(option="magma", begin=.1,end=.9, discrete=TRUE,
breaks =(colors_labs),
labels=TeX(as.character(colors_labs))) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "By State",
fill = "") Compare States
# comp_state_pal <- c("#247C90", "red")
#
# names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")
#
#
# vlab_for_pal <- unique(subset$vlabel) %>% as.character()
color_for_bias <- pal[[which(names(pal)=="$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")]]
comp_state_pal<- c(color_for_bias,"red")
names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")
subset %>%
# mutate(vlabel = gsub("$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$",
# "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", as.character(vlabel))) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = 'Probabilistic Bias Analysis'),
alpha = .8,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .6) +
geom_vline(xintercept=end_old_model,
linetype=2,
color="darkred",
linewidth=.5) +
facet_wrap(~name, ncol=4, scales = "free",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3.5 months",
date_labels = "%b %Y") +
theme_c(axis.text.x=element_text(size=7),
axis.text.y = element_text(size=4.5),
legend.position = "top",
legend.text=element_text(size=12),
strip.text.x=element_text(size=11,
face='plain',
color='white')) +
scale_fill_manual(values =comp_state_pal, name='') +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by State")) +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))Compare States that Have a Lag
# states_with_lag
subset %>%
filter(state %in% states_with_lag & biweek <= 25) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = 'Probabilistic Bias Analysis'),
alpha = .8,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .6) +
# geom_vline(xintercept=end_old_model,
# linetype=2,
# color="darkred",
# linewidth=.5) +
facet_wrap(~name, ncol=3, scales = "free",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3.5 months",
date_labels = "%b %Y") +
theme_c(axis.text.x=element_text(size=7),
axis.text.y = element_text(size=4.5),
legend.position = "top",
legend.text=element_text(size=12),
strip.text.x=element_text(size=11,
face='plain',
color='white')) +
scale_fill_manual(values =comp_state_pal, name='') +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by State\nin States with a Lag")) +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))Test Positivity and Positive Tests in States with a Lag
############################################
# POSITIVE TESTS AND TEST POSITIVITY
############################################
pltlist <- subset %>%
filter(biweek<=25) %>%
filter(state %in% states_with_lag) %>%
mutate(testpos = positive/total) %>%
group_by(across(-date)) %>%
summarize(date=min(date) + days(7)) %>%
group_by(state) %>%
mutate(adj = (1/max(testpos)) * max(positive)) %>%
group_split() %>%
map(~ {
adj <-unique(.x$adj)
.x %>%
ggplot(aes(x=date))+
geom_line(aes(y=positive, color = 'Positive Tests')) +
geom_point(aes(y=positive, color = 'Positive Tests'),
alpha=.5,
size=.8) +
geom_line(aes(y=testpos*adj, color='Test Positivity'),
linetype=2) +
geom_point(aes(y=testpos*adj, color='Test Positivity'),
alpha=.5,
size=.8) +
#scale_linetype_discrete(breaks = c(1,1,2)) +
scale_color_manual(values=c(
"Positive Tests"= "#279143",
"Test Positivity"= '#E38E4F')) +
guides(color = guide_legend(override.aes = list(linetype = c(1,2),
linewidth=c(2,2)))) +
theme_c() +
theme(axis.title = element_text(size=10),
axis.text =element_text(size=9),
legend.position="none") +
scale_y_continuous(labels=comma,
sec.axis = sec_axis(
~./adj,
name = 'Test Positivity')) +
labs(color ='',
y="Positive Tests",
x= "Date") +
facet_wrap(~name)
})
legend <- get_legend(
pltlist[[1]] +
theme(legend.position = "top",
legend.text=element_text(size=16))
)
title_gg <- ggplot() +
labs(title = "Comparing Trends in Positive Tests and Test Positivity") +
theme(plot.title=element_text(face="bold",
hjust = .5,
size = 18,
margin =margin(5,0,2,0)))
plt <- cowplot::plot_grid(plotlist=pltlist, ncol=3)
plot_grid(title_gg, legend, plt, rel_heights=c(.03, .03,.94), ncol=1)Ratio of Estimated to Observed
subset %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb/positive,
ymax = exp_cases_ub/positive,
fill = vlabel),
alpha = .9,
show.legend=FALSE,
fill= "#247C90") +
facet_wrap(~state, ncol=5,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(axis.text.x=element_text(size=6),
axis.text.y = element_text(size=8),
legend.position = "top") +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name='',
breaks="Covidestim") +
labs(y = "Ratio of Estimated Infections to Observed",
x="",
title = paste0("Ratio of Estimated Infections to Observed by State")) +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))Comparing States at Specific Two-week Intervals
During Alpha Wave
max_val <- subset %>%
# left_join(codes) %>%
filter(biweek %in% c(7, 13,27)) %>%
mutate(ratio=exp_cases_ub/positive) %>%
pull(ratio) %>%
max()
plt1 <- subset %>%
# left_join(codes) %>%
filter(biweek == 7) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
# geom_point(aes(y=state_name,x=exp_cases_median/positive),
# color = "darkred", size=.8, alpha=.5) +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Alpha Wave",
subtitle = "March 26, 2021 through April 8, 2021") +
xlim(0,max_val+2)
plt1During Delta Wave
subset %>%
mutate(ratio = exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(mean = mean(ratio),
mindate=min(date),
maxdate=max(date)) %>%
arrange(desc(mean)) plt2 <- subset %>%
# left_join(codes) %>%
filter(biweek == 13) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
# geom_point(aes(y=state_name,x=exp_cases_median/positive),
# color = "darkred", size=.8, alpha=.5) +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Delta Wave",
subtitle = "June 18, 2021 through July 1, 2021") +
xlim(0,max_val+2)
plt2During Omicron Wave
subset %>%
left_join(codes) %>%
filter(biweek == 27) %>%
pull(date) %>%
range()
subset %>%
left_join(codes) %>%
filter(biweek == 13) %>%
pull(date) %>%
range()
plt3 <- subset %>%
# left_join(codes) %>%
filter(biweek == 27) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Omicron Wave",
subtitle = "December 31, 2021 through January 14, 2022") +
xlim(0,max_val+2)
plt3All Waves Together
plt <- plot_grid(plt1,plt2, plt3, nrow=1, align="none", labels="auto", label_size=19)
title <- ggplot() +
labs(title = "Ratio of Estimated Infections to Observed Infections") +
theme_c(plot.title=element_text(size=25))
plot_grid(title, plt, nrow=2,
rel_heights= c(.05,.95))## Time interval with the highest ratio of estimated cases to observed:
subset %>%
group_by(biweek) %>%
mutate(ratio=exp_cases_median/positive) %>%
summarize(mean_ratio=mean(ratio),
date = paste(range(date),collapse=" to ")) %>%
slice_max(n=1, order_by=mean_ratio)subset %>%
# left_join(codes) %>%
filter(biweek %in% c(7, 13, 27)) %>%
select(-date) %>%
distinct() %>%
group_by(state) %>%
mutate(ord = empirical_testpos[which(biweek==7)]) %>%
mutate(biweek=factor(biweek)) %>%
ggplot(aes(y=fct_reorder(state,ord),
x=empirical_testpos,fill=biweek)) +
geom_bar(stat="identity",position="dodge")+
theme_c() +
theme_c() +
labs(title = "Testing Positivity by State",
y="",
x="Biweekly Testing Positivity",
fill="") +
scale_x_continuous(n.breaks=10)subset %>%
# left_join(codes) %>%
filter(biweek %in% c(7, 13, 27)) %>%
group_by(biweek) %>%
mutate(daterange= paste(format(min(date), "%B %d %Y"),
"to",
format(max(date), "%B %d %Y")),
daterange=factor(
daterange,
levels=c(
"March 26 2021 to April 08 2021",
"June 18 2021 to July 01 2021",
"December 31 2021 to January 14 2022"))) %>%
select(-date) %>%
distinct() %>%
group_by(state) %>%
mutate(testrate=total/population,
ord = testrate[which(biweek==7)]) %>%
mutate(biweek=factor(biweek)) %>%
ggplot(aes(y=fct_reorder(name,ord,.desc=TRUE),
x=testrate,fill=daterange)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
labs(title = "Testing Rate by State",
y="",
x="Biweekly Testing Rate",
fill="") +
scale_x_continuous(n.breaks=10)compare_testrates <- subset %>%
mutate(testrate=total/population) %>%
select(-date) %>%
distinct() %>%
filter(biweek %in% c(7, 13, 27)) %>%
select(biweek,state,testrate) %>%
pivot_wider(names_from=biweek, values_from=testrate, names_prefix="biweek_") %>%
mutate(omicron_over_alpha = biweek_27/biweek_7,
omicron_over_delta = biweek_27/biweek_13) %>%
select(state,contains("omicron")) %>%
pivot_longer(c(omicron_over_alpha,omicron_over_delta)) %>%
group_by(name) %>%
summarize(mean_val = mean(value))
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_alpha") %>% pull(mean_val), "times the testing rate during this time interval in the alpha wave.") ## The testing rate during this two-week interval during the omicron wave was 2.440557 times the testing rate during this time interval in the alpha wave.
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_delta") %>% pull(mean_val), "times the testing rate during this time interval in the delta wave.") ## The testing rate during this two-week interval during the omicron wave was 4.931176 times the testing rate during this time interval in the delta wave.
Rank Ratio of Estimated to Observed Over Time
ranks <- subset %>%
mutate(ratio = exp_cases_median/positive,
tested = total/population) %>%
# one observation per biweek per state
select(biweek, date, ratio, name,tested) %>%
group_by(biweek,ratio,name,tested) %>%
summarize(date=min(date)) %>%
# rank for each time interval
group_by(biweek) %>%
mutate(rank = rank(ratio),
rank_tested=rank(-tested))
r <- ranks %>%
mutate(rank_group = case_when(
rank <= 10 ~ "Top 10",
rank > 41 ~ "Bottom 10",
TRUE ~ "Middle" )) %>%
group_by(name, rank_group) %>%
mutate(n=n()) %>%
group_by(name) %>%
mutate(total =n(),
max_group = rank_group[which.max(n)]) %>%
filter( max(n) >24) %>%
filter(max_group != "Middle") %>%
ungroup()
top_10 <- r %>% filter(rank_group=="Top 10") %>%
arrange(desc(n)) %>% pull(name) %>%
unique() %>%paste(collapse=", ")
cat("States that consistently have the lowest ratios:", top_10)## States that consistently have the lowest ratios: Rhode Island, Massachusetts, District of Columbia, Alaska, New York, Vermont
bottom_10 <- r %>% filter(rank_group=="Bottom 10") %>%
arrange(desc(n)) %>% pull(name) %>%
unique() %>%paste(collapse=", ")
cat("States that consistently have the highest ratios:", bottom_10)## States that consistently have the highest ratios: Mississippi, South Dakota, Oklahoma, Nebraska, Tennessee
end_labs <- r %>%
ungroup() %>%
filter(date==max(date)) %>%
mutate(date = date + days(10)) %>%
select(name, rank, date) %>%
ungroup()##############################
# RANK PLOT OVER TIME
##############################
#
# set.seed(999)
#
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
#
#
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]
set.seed(123)
rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b",
"#da239b", "#b7d165", "#453fbc",
"#658114", "#af3fe8", "#ffb947",
"#0a60a8", "#f87945", "#16d0ae")
rankpal <- c("#851657", "#be3acd", "#19a71f",
"#824598", "#ed820a", "#BB8E37",
"#974810", "#1f84ec", "#d02023",
"#059dc5", "#f23387", "#16d0ae")
indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]
r %>%
# filter(rank_group!="Middle") %>%
ggplot() +
geom_point(aes(x=date,y=rank,
group=name,
color= name),
size=.5) +
geom_line(aes(x=date,y=rank,
group=name,
color= name)) +
# facet_wrap(~max_group) +
ggrepel::geom_text_repel( aes(x= date-days(4),
y = rank,
color=name,
label = name),
end_labs,
min.segment.length=2,
nudge_y=0,
hjust=0,
size=3,
direction="y",
force_pull=9,
box.padding=.15) +
theme_c(legend.position="none",
axis.text.x = element_text(angle =0,size=14)) +
# theme(plot.subtitle =element_text(size=18),
# axis.title.x=element_text(size=16),
# axis.title.y=element_text(size=16)) +
# scale_color_brewer(palette="Dark2") +
scale_color_manual(values=rankpal) +
scale_x_date(date_breaks="2 months", date_labels = "%b %Y",
limits = c(ymd("2021-01-01"),
ymd("2022-04-15"))) +
labs(y = "Rank of Ratio",
title = "Rank of Estimated Infections to Observed Infections Over Time",
subtitle = "For States Ranked in the Top or Bottom 10\nfor More Than 80% of Time Intervals",
x="Date") +
scale_y_reverse(breaks=seq(1,51, by=9))Summer Testing Rate
subset %>%
mutate(testrate = total/population) %>%
group_by(biweek) %>%
mutate(m = median(testrate),
date=min(date)) %>%
ggplot(aes(x=date,y=testrate, group=state)) +
geom_line(alpha=.2) +
geom_line(aes(y=m, color='Median'), size=2) +
labs(y = "Total Number of Tests\nOver Population Size",
title ="Testing Rate Over Time",
x="") +
theme_c() +
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16),
legend.position='top') +
geom_vline(xintercept = ymd("2021-07-16"), linetype = 2, linewidth=.5) +
geom_vline(xintercept = ymd("2021-06-18"), linetype = 2, linewidth=.5) +
scale_x_date(date_breaks="2 months",
date_labels = "%b %Y") +
scale_color_manual(values=c(Median="darkred"),name='')Variant Data
m <- state_res %>%
filter(state == "MI" & version=="v7") %>%
pull(exp_cases_ub) %>%
max()
variant <- tar_read(variant, store =targets_dir)
varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8",
"#97481c", "#5054b1", "#DA7BA8", "#26B392")
state_res %>%
filter(state == "MI" & version %in% c("v7")) %>%
# filter(state %in% sample(corrected$state,5)) %>%
# filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill =vlabel),
# fill = "#3E3D3D",
alpha = .5,
show.legend=FALSE) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(legend.position="top") +
geom_line(aes(x=week, y=share*m,
color =variant_category,
group=variant_category),
data=variant,
linewidth=1.3) +
# scale_fill_manual(values =pal,
# labels = TeX(names(pal)),
# breaks='Covidestim',
# name='') +
scale_y_continuous(sec.axis = sec_axis( trans=~./m, name="Variant Proportion"),
labels = comma) +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version in Michigan"),
color = "SARS-CoV-2 Variant",
x="Date") +
guides(color = guide_legend(override.aes = list(linewidth =3),
ncol=2,title.position="top")) +
# scale_color_brewer(palette="Accent")
scale_color_manual(values=varpal) +
scale_fill_manual(values=pal)Comparing Overlap
Weaknesses in this approach:
- it doesn’t distinguish between a very small interval being covered (i.e. when there are very few cases) and a larger interval being covered (i.e. when there are many cases)
- it doesn’t consider the width of the PB interval
state_overlaps <- subset %>%
select(-date) %>%
distinct() %>%
mutate(lower_overlap = map2_dbl(infections.lo, exp_cases_lb, ~max(.x,.y)),
upper_overlap = map2_dbl(infections.hi, exp_cases_ub, ~min(.x,.y)),
overlap = case_when(
upper_overlap-lower_overlap > 0 ~ upper_overlap-lower_overlap,
TRUE ~ 0
)) %>%
select(contains('overlap'), name, biweek,
population, positive, total, exp_cases_lb,
exp_cases_ub, contains('infections')) %>%
mutate(overlap_not_norm = overlap,
overlap = overlap/population,
fraction_overlap = overlap_not_norm/(infections.hi-infections.lo))
#############
# BY STATE
#############
state_overlaps %>%
group_by(name) %>%
summarize(mean_overlap=mean(fraction_overlap)) %>%
ggplot(aes(y= fct_reorder(name, mean_overlap),
x = mean_overlap)) +
geom_bar(stat="identity") +
theme_c() +
labs(y="",
x="Mean Overlap",
title = "Mean Overlap with Covidestim by State",
subtitle = "Overlap Normalized by Width of Covidestim Interval")##########################
# BY TWO-WEEK INTERVAL
##########################
state_overlaps %>%
group_by(biweek) %>%
summarize(mean_overlap=mean(fraction_overlap)) %>%
left_join(dates) %>%
group_by(biweek) %>%
summarize(date = median(date),
mean_overlap=unique(mean_overlap)) %>%
ggplot(aes(x=date,
y = mean_overlap)) +
geom_bar(stat="identity", alpha =.7) +
geom_vline(xintercept=end_old_model,linetype=2,
color='darkred',
linewidth=1.1) +
theme_c() +
labs(y="",
x="Mean Overlap",
title = "Mean Overlap with Covidestim by Time Interval",
subtitle = "Overlap Normalized by Width of Covidestim Interval") +
scale_x_date(date_breaks="2 months", date_labels="%b %Y")##########################
# BY TWO-WEEK INTERVAL
##########################
# state_overlaps %>%
# mutate(posrate=positive/total) %>%
# filter(name %in% sample(unique(state_overlaps$name), 5)) %>%
# ggplot(aes(x=posrate, y = fraction_overlap)) +
# geom_point(alpha=.8) +
# theme_c() +
# # facet_wrap(~name, scales="free_y") +
# labs(x = "Positivity Rate", y = "Overlap") +
# scale_y_continuous(labels=comma)set.seed(123)
state_sample <- sample(unique(state_overlaps$name), 16)
state_overlaps %>%
mutate(posrate=positive/total) %>%
# filter(name %in% state_sample) %>%
ggplot(aes(x=posrate, y = fraction_overlap)) +
geom_point(alpha=.8) +
theme_c() +
facet_wrap(~name, scales="free_y") +
labs(x = "Positivity Rate", y = "Overlap",
title = "Comparing Fraction of Interval Covered by Positivity Rate",
subtitle = "Each Point Corresponds to a State-Biweek")state_overlaps %>%
mutate(posrate=positive/total) %>%
# filter(name %in% state_sample) %>%
ggplot(aes(x=posrate, y = fraction_overlap)) +
geom_point(alpha=.8) +
theme_c() +
# facet_wrap(~name, scales="free_y") +
labs(x = "Positivity Rate", y = "Overlap",
title = "Comparing Fraction of Interval Covered by Positivity Rate",
subtitle = "Each Point Corresponds to a State-Biweek")state_overlaps %>%
mutate(testrate=total/population) %>%
filter(name %in% state_sample) %>%
ggplot(aes(x=testrate, y = fraction_overlap)) +
geom_point(alpha=.8) +
theme_c() +
facet_wrap(~name, scales="free") +
labs( x= "Total Tests / Population Size",
y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
title = "Comparing Fraction of Interval Covered by Testing Rate",
subtitle = "Each Point Corresponds to a State-Biweek")state_overlaps %>%
mutate(testrate=total/population) %>%
filter(name %in% state_sample) %>%
ggplot(aes(x=testrate, y = fraction_overlap)) +
geom_point(alpha=.8) +
theme_c() +
labs( x= "Total Tests / Population Size",
y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
title = "Comparing Fraction of Interval Covered by Testing Rate",
subtitle = "Each Point Corresponds to a State-Biweek")Relationship Between Agreement and Cumulative Incidence (Pre-Omicron)
state_population_link <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv"
state_pop <- read_csv(state_population_link)
statecodes <- read_csv(here("data/demographic/statecodes.csv"))
state_pop <- state_pop %>%
left_join(statecodes, by = c("NAME" = "state")) %>%
select(population = POPESTIMATE2019,
state = code,
name = NAME) %>%
filter(!is.na(state))
state_deaths <- tar_read(state_deaths, store=targets_dir)
set.seed(123)
state_sample <- sample(unique(state_deaths$state), 5)
state_deaths %>%
# filter(state %in% state_sample) %>%
ggplot(aes(x=date,y=deaths/cases)) +
geom_line() +
facet_wrap(~state) +
theme_c() +
geom_vline(xintercept=end_old_model, color='darkred', linetype=2)comp_deaths <- state_deaths %>%
mutate(omicron=ifelse(date <= mdy("12-02-2021"),
"pre_omicron", "omicron")) %>%
group_by(state, omicron) %>%
summarize(cumulative_deaths = sum(deaths),
cumulative_cases= sum(cases)) %>%
pivot_wider(names_from = omicron,
values_from = c(cumulative_deaths, cumulative_cases)) %>%
mutate(pre_deaths_over_cases = cumulative_deaths_pre_omicron/cumulative_cases_pre_omicron,
omicron_deaths_over_cases = cumulative_deaths_omicron/cumulative_cases_omicron)
comp_deaths %>%
select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
pivot_longer(contains("over_cases")) %>%
group_by(state) %>%
mutate(pre = value[which(name=="pre_deaths_over_cases")]) %>%
mutate(name = case_when(
grepl("omicron", name) ~ "Omicron",
grepl("pre", name) ~ "Before Omicron"
)) %>%
ggplot(aes(y=fct_reorder(state,pre), x= value, fill=name)) +
geom_bar(stat='identity', position='dodge') +
theme_c(legend.title = element_text(face="bold", size=14)) +
labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
y="",
fill = "Time Period",
title = "Total Deaths Over Total Cases in the Time Period Before Omicron\n Compared to During Omicron") +
scale_x_continuous(n.breaks=7)comp_deaths %>%
select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
mutate(pct_change =( omicron_deaths_over_cases -pre_deaths_over_cases ) / pre_deaths_over_cases ) %>%
ggplot(aes(y=fct_reorder(state,pct_change), x= pct_change)) +
geom_bar(stat='identity', position='dodge') +
theme_c(legend.title = element_text(face="bold", size=11),
axis.text.x=element_text(size=14),
plot.subtitle = element_text(size=16,
face="plain",
margin=margin(0,0,4,0))) +
labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
y="",
fill = "Time Period",
title = TeX("Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
subtitle="Compared to During Omicron") +
scale_x_continuous(labels=scales::percent)comp_deaths %>%
select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
mutate(pct_change =( omicron_deaths_over_cases - pre_deaths_over_cases ) / pre_deaths_over_cases ) %>%
left_join(state_overlaps, by = c('state' = 'name')) %>%
group_by(state) %>%
summarize(mean_overlap=mean(fraction_overlap),
pct_change=unique(pct_change)) %>%
ggplot(aes(x=pct_change,y=mean_overlap)) +
geom_point(size=2) +
theme_c( plot.subtitle = element_text(size=16,
face="plain",
margin=margin(0,0,4,0))) +
scale_x_continuous(labels=scales::percent) +
labs(x="Percent Change", y = "Mean Overlap\n(By State)",
title = TeX("Relationship Between the Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
subtitle="Compared to During Omicron versus the Mean Overlap")comp_deaths %>%
rename(name=state) %>%
ungroup() %>%
left_join(state_pop[,c("name", "population")]) %>%
mutate(frac_infected = cumulative_cases_pre_omicron/population) %>%
left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
filter(biweek >= 25) %>%
group_by(name) %>%
summarize(mean_overlap=mean(fraction_overlap),
frac_infected=unique(frac_infected)) %>%
ggplot(aes(x=frac_infected, y =mean_overlap)) +
geom_point() +
theme_c()covidestim_link <- "https://covidestim.s3.us-east-2.amazonaws.com/latest/state/estimates.csv"
covidestim <- read_csv(covidestim_link)
# join data from each source to include dates before and after 2021-12-02
covidestim <- covidestim %>%
select(date, contains("infections"), state) %>%
filter(year(date) > 2020) %>%
mutate(week = week(date), year = year(date))
covidestim %>%
group_by(state) %>%
summarize(infections=sum(infections))%>%
rename(name=state) %>%
left_join(state_pop[,c("name", "population")]) %>%
mutate(frac_infected = infections/population) %>%
left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
filter(biweek >= 25) %>%
group_by(name) %>%
summarize(mean_overlap=mean(fraction_overlap),
frac_infected=unique(frac_infected)) %>%
ggplot(aes(x=frac_infected, y =mean_overlap)) +
geom_point() +
theme_c()